//////////////////////////////////////////////////////////////
// TITLE: 			Reshape Cut Groups
// DESCRIPTION:
// Approximate an equal split of MasterPortfolioIds by complexity
// Generate a file which bins MasterPortfolioIds into 100 groups.
// Save the bin file as a mapping.
//////////////////////////////////////////////////////////////

forval year=2005/2020 {
    append using "$morningstar_hd/HD_`year'", keep(MasterPortfolioId date_m)
}
sort MasterPortfolioId
gen n=_n
local group_size=ceil(n[_N]/$groups)
gen groupnum=.
forvalues x=1/$groups {
	replace groupnum=`x' if n<=(`x')*`group_size' & groupnum==.
}
drop date_m
drop n

bysort MasterPortfolioId: keep if _n == _N
gen gap = MasterPortfolioId[_n+1] - MasterPortfolioId
replace gap = 0 if missing(gap)
expand gap, generate(newvar)
sort MasterPortfolioId
replace MasterPortfolioId = MasterPortfolioId[_n-1] + 1 if MasterPortfolioId <= MasterPortfolioId[_n-1] & !missing(MasterPortfolioId[_n-1])
sort MasterPortfolioId
replace group = group[_n-1] if missing(group)
drop gap newvar
duplicates drop

save "$temp/mpid_list.dta", replace
